{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 0. Current Loss: 0.14516235888004303.\n",
      "Epoch 1. Current Loss: 0.02736356295645237.\n",
      "Epoch 2. Current Loss: 0.036029402166604996.\n",
      "Epoch 3. Current Loss: 0.19768565893173218.\n",
      "Epoch 4. Current Loss: 0.010247528553009033.\n",
      "Epoch 5. Current Loss: 0.05027035251259804.\n",
      "Epoch 6. Current Loss: 0.0797794908285141.\n",
      "Epoch 7. Current Loss: 0.0055908383801579475.\n",
      "Epoch 8. Current Loss: 0.0030122234020382166.\n",
      "Epoch 9. Current Loss: 0.0014226468047127128.\n"
     ]
    }
   ],
   "source": [
    "import mxnet as mx\n",
    "from mxnet import gluon, autograd, ndarray\n",
    "import numpy as np\n",
    "\n",
    "# Download the MNIST dataset, then create the training and test sets \n",
    "train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=lambda data, label: (data.astype(np.float32)/255, label)),\n",
    "                                      batch_size=32, shuffle=True)\n",
    "test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=lambda data, label: (data.astype(np.float32)/255, label)),\n",
    "                                     batch_size=32, shuffle=False)\n",
    "# Initialize the model\n",
    "net = gluon.nn.Sequential()\n",
    "\n",
    "# Define the model architecture\n",
    "with net.name_scope():\n",
    "    net.add(gluon.nn.Dense(128, activation=\"relu\")) # 1st layer - 128 nodes\n",
    "    net.add(gluon.nn.Dense(64, activation=\"relu\")) # 2nd layer – 64 nodes\n",
    "    net.add(gluon.nn.Dense(10)) # Output layer, one for each number 0-9\n",
    "\n",
    "# We start with random values for all of the model’s parameters from a \n",
    "# normal distribution with a standard deviation of 0.05\n",
    "net.collect_params().initialize(mx.init.Normal(sigma=0.05))\n",
    "softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()\n",
    "\n",
    "# We opt to use the stochastic gradient descent (sgd) training algorithm \n",
    "# and set the learning rate hyperparameter to .1\n",
    "trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})\n",
    "\n",
    "# Loop through several epochs and watch the model improve\n",
    "epochs = 10\n",
    "for e in range(epochs):\n",
    "    for i, (data, label) in enumerate(train_data):\n",
    "        data = data.as_in_context(mx.cpu()).reshape((-1, 784))\n",
    "        label = label.as_in_context(mx.cpu())\n",
    "        with autograd.record(): # Start recording the derivatives\n",
    "            output = net(data) # the forward iteration\n",
    "            loss = softmax_cross_entropy(output, label)\n",
    "            loss.backward()\n",
    "        trainer.step(data.shape[0])\n",
    "        # Provide stats on the improvement of the model over each epoch\n",
    "        curr_loss = ndarray.mean(loss).asscalar()\n",
    "    print(\"Epoch {}. Current Loss: {}.\".format(e, curr_loss))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
